/* 
Inputs required
1) VZ_mw_state_quarterly_new from Cengiz, Dube, Lindner, Zipperer (2019) replication
2) FedMW_DOL from the Department of Labor, pulling the second sheet "Extended" which I constructed from that
3) State_MWs_FRED/STTMINWGXX for each state (XX), pulled from FRED
4) cps-to-census.dta is the concordance between state codes
*/

clear all
pause off

cd "/Users/jvogel/Dropbox/JvogelPrivateWork/Cannonical_Model_MW/Replication/Data/"

/*******************************************************************************
           Use Cengiz, Dube, Lindner, Zipperer (2019)
Contains: Quarterly mean, min, max observations for each state and Fed btw 1974q2 - 2017q1

Take average across quarters of average within quarters 
(not the same as in state-level analysis)
Use 1974 from this dataset but not 2017
*******************************************************************************/

/******************************
  (1) Construct state series for 1974-2016
******************************/

use "Input/VZ_mw_state_quarterly_new", clear

* create year and quarter variables
rename quarterly_date qd
gen year = year(dofq(qd))
gen quarter = quarter(dofq(qd))
* check that state mean is weakly greater than federal
gen diff = mean_mw - mean_fed_mw
assert diff>-0.00001
* use the average across quarters of the quarterly average
bys year statefips: egen mw=mean(mean_mw)
* keep year, avg mw, and state; then drop duplicates
keep year statefips mw
duplicates drop
* only have q1 of 2017, so drop it
drop if year==2017

* Save it
save "Output/state_year_combo.dta", replace

/******************************
  (2) Construct federal series for 1974-2016
******************************/

use "Input/VZ_mw_state_quarterly_new", clear

* create year and quarter variables
rename quarterly_date qd
gen year = year(dofq(qd))
gen quarter = quarter(dofq(qd))
* use the average across quarters of the quarterly average
bys year: egen fedmw=mean(mean_fed_mw)
* keep year and avg mw; then drop duplicates
keep year fedmw
duplicates drop
drop if year>2016

* Save it
save "Output/fedminCDLZ.dta", replace

/*******************************************************************************
State minimum wages for years < 1974
Federal m.w. from DOL
State m.w. from FRED
*******************************************************************************/

**********
**** Combine federal minimum wages from the DOL for <1974 to CDLZ data
**********

import excel "Input/FedMW_DOL.xlsx", sheet("Extended") firstrow clear
reshape wide mw, i(Year) j(Month)
reshape long mw, i(Year) j(Month)
sort Year Month
replace mw=mw[_n-1] if mw==.
drop if Year<1963 | Year>2016
drop if mw==.
rename Year year
rename Month month
bys year: gen count=_N
assert count==12
drop count

* Create maximum minimum wage across months at the federal level
bys year: egen MW=max(mw)
keep year MW
duplicates drop
rename MW fedmw
drop if year>=1974 & year<=2016  // keep only those years for which CDLZ is missing
append using "Output/fedminCDLZ.dta"
sort year
assert fedmw>=fedmw[_n-1] if year>1963
label variable fedmw "nominal federal minimum wage in year"
bys year: gen count=_N
assert count==1
drop count
save "Output/fedmin.dta", replace

**********
**** State minimum wages from FRED + national mw from DOL for <1974 and >2016
**********

import delimited "Input/State_MWs_FRED/STTMINWGAK.csv", clear 
replace date=substr(date,1,4)
rename date year
destring year, replace
rename sttminwg state_min
gen state="AK"
sum year
local min=r(min)
assert year>=`min' & year~=.
local min1=`min'-1
forval y=1963(1)`min1' {
  set obs `=_N+1'
  replace year=`y' if year==.
}
replace state="AK" if state==""
save "Output/state_year_FRED.dta", replace

foreach state in AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY MA MD ME MI MN MO MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SD TX UT VA VT WA WI WV WY {
  import delimited "Input/State_MWs_FRED/STTMINWG`state'.csv", clear 
  replace date=substr(date,1,4)
  rename date year
  destring year, replace
  rename sttminwg state_min
  sum year
  local min=r(min)
  assert year>=`min' & year~=.
  local min1=`min'-1
  forval y=1963(1)`min1' {
    set obs `=_N+1'
    replace year=`y' if year==.
  }
  gen state="`state'"
  append using "Output/state_year_FRED.dta"
  save "Output/state_year_FRED.dta", replace
}
label variable state_min "Annual nominal state m.w. from FRED"
rename state sname

* AL, LA, MS, SC, and TN never have state minimum wages, so not in FRED
assert sname~=""
assert year~=.
set obs `=_N+1'
replace sname="AL" if sname==""
set obs `=_N+1'
replace sname="LA" if sname==""
set obs `=_N+1'
replace sname="MS" if sname==""
set obs `=_N+1'
replace sname="SC" if sname==""
set obs `=_N+1'
replace sname="TN" if sname==""
replace year=2019 if year==.

* dataset has missing observations in AL, LA, MS, SC, and TN: reshape to replace them
reshape wide state_min, i(sname) j(year)
reshape long state_min, i(sname) j(year)
bys sname: gen count=_N
assert count==60
drop count

drop if year>2016

* merge with federal minimum wages
merge m:1 year using "Output/fedmin.dta"
assert _merge==3
drop _merge
gen mw=max(state_min, fedmw)
assert mw~=.
keep year sname mw

* only keep the years for which don't have data from CDLZ
drop if year>=1974 & year<=2016

/*******************************************************************************
Combine CDLZ (1974-2016) w/ (FRED + DOL) for <1974
*******************************************************************************/

* bring in statefips
rename sname abbrev
merge m:1 abbrev using "Input/cps-to-census.dta"
assert _merge==3
rename state_cen statefips
keep year mw statefip

* combine with CDLZ
append using "Output/state_year_combo.dta"
bys statefips: gen count=_N
assert count==54 // 2016-1963+1 (since data from 1963-2016)
assert mw~=.
drop count

* save it
sort statefips year
label variable mw "Nominal annual state applied mw"
save "Output/state_year_combo.dta", replace

* remove datasets constructed along the way (fedmin is used in CM_mw.do)
rm "Output/state_year_FRED.dta"
rm "Output/fedminCDLZ.dta"
